/* 
This file produces the employer_agency_panel.dta used to generate tables and charts 
relating to employers (see restat_employers.do)

*/

clear
set matsize 100
set more off 
# delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";


** 1. produce a file unique at the prino-level; 

use ../dta_secure/bad_employers_al.dta, clear; 


collapse (first) new_employer bad_employer_pre2009 good_employer_pre2009  bad_employer_pre2009_25
good_employer_pre2009_25  bad_employer_pre2009_10 good_employer_pre2009_10 
migrants_pre2009  emp_comp_rate_pre2009 , by(prino);

save ../dta_secure/1_prino.dta, replace; 



** 2. merge in prino-level data into final.dta; 


use ../dta_secure/final.dta, clear; 


merge m:1 prino using ../dta_secure/1_prino.dta, gen(prino_merge);


** 3. produce a prino*agency*yr dataset ;

gen migrant = 1; 


collapse (sum) migrant (mean) usd_salary usd_salary_d job_code_domestic (first) new_employer 

 bad_employer_pre2009_10 good_employer_pre2009_10
  bad_employer_pre2009_25 good_employer_pre2009_25


bad_employer_pre2009 good_employer_pre2009 migrants_pre2009
 emp_comp_rate_pre2009 an, by(agency_id prino departure_year); 



** 4. merge in ratings info; 


rename an license_no; 


merge m:1 license_no using ../dta_secure/report_ratings2012, gen(ratingsdtamerge);


gen elig = (performance_rate > 0 & performance_rate != . );


** misc; 

* correct pr typos; 

replace performance_rate = performance_rate / 100 if performance_rate > 90;

destring departure_year, replace; 


** 5. figure out matches; 

* match 2005-2009 to 2010, 2011 et... up to 2015, one by one; 
* this way we know that if there is non-zero match number, it is b'cos of a match to the pre-period; 
* if we sum the vectors for the individual yrs, that gives us match estimates across time periods; 
* taking the distinct prino in the summed vector gives the unique matches;  

foreach i of numlist 2010/2015 {; 


duplicates tag agency_id prino if (departure_year <= 2009 | departure_year  == `i'), gen(match_`i'z); 
gen match_`i' = (match_`i'z != .  & match_`i'z  > 0 & departure_year == `i'); 

}; 


egen match_10_11 = rowmax(match_2010 match_2011); 
egen match_10_15 = rowmax(match_2010 match_2011 match_2012 match_2013 match_2014 match_2015); 


* merge in agency details; 


merge m:1 agency_id using ../dta_secure/treatment_assignment_042919.dta, gen(treat_merge) keep(3); 


* tag for matched agencies ; 

bysort prino agency_id: egen matched = max(match_10_15) if match_10_15 != . ; 

label var matched "1 if prino worked with an agency before and after program for all obs"; 


* merge in prino*agency_id*year level job order data; 

rename departure_year year; 

merge 1:1 prino license_no year using ../dta_secure/firm_emp_yr.dta, gen(joborderdetailsmerge); 




** misc; 

drop if agency_id == . ; 
drop if prino == "" ;


merge m:1 agency_id using ../dta_secure/agencies_predicted_rating.dta, gen(predicted_merge);

winsor usd_salary, p(0.025) gen(usd_salary_win) ;
winsor usd_salary_d, p(0.025) gen(usd_salary_win_d) ;


replace new_employer = 0 if year < 2010; 

gen comp_opt = (elig_migs >= 45 & elig_migs <= 155 & elig_migs != .); 


save ../dta_secure/employer_agency_panel.dta, replace; 

